/**
 * Provides classes for detecting generated code.
 */

import javascript
import semmle.javascript.frameworks.Bundling
import semmle.javascript.frameworks.Emscripten
import semmle.javascript.frameworks.GWT
import semmle.javascript.SourceMaps

/**
 * A comment that marks generated code.
 */
abstract class GeneratedCodeMarkerComment extends Comment { }

/**
 * A source mapping comment, viewed as a marker comment indicating generated code.
 */
private class SourceMappingCommentMarkerComment extends GeneratedCodeMarkerComment instanceof SourceMappingComment
{ }

/**
 * A marker comment left by a known code generator.
 */
class CodeGeneratorMarkerComment extends GeneratedCodeMarkerComment {
  CodeGeneratorMarkerComment() { codeGeneratorMarkerComment(this, _) }

  /** Gets the name of the code generator that left this marker comment. */
  string getGeneratorName() { codeGeneratorMarkerComment(this, result) }
}

/**
 * Holds if `c` is a comment left by code generator `tool`.
 */
private predicate codeGeneratorMarkerComment(Comment c, string tool) {
  exists(string toolPattern |
    toolPattern =
      "js_of_ocaml|CoffeeScript|LiveScript|dart2js|ANTLR|PEG\\.js|Opal|JSX|jison(?:-lex)?|(?:Microsoft \\(R\\) AutoRest Code Generator)|purs" and
    tool =
      c.getText()
          .regexpCapture("(?s)[\\s*]*(?:parser |Code )?[gG]eneratedy? (?:from .*)?by (" +
              toolPattern + ")\\b.*", 1)
  )
}

/**
 * A generic generated code marker comment.
 */
private class GenericGeneratedCodeMarkerComment extends GeneratedCodeMarkerComment {
  GenericGeneratedCodeMarkerComment() {
    exists(string entity, string was, string automatically |
      entity = "code|file|class|interface|art[ei]fact|module|script" and
      was = "was|is|has been" and
      automatically = "automatically |mechanically |auto[- ]?" and
      // Look for this pattern in each line of the comment.
      this.getText()
          .regexpMatch("(?im)^.*\\b(This|The following) (" + entity + ") (" + was + ") (" +
              automatically + ")?gener(e?)ated\\b.*$")
    )
  }
}

/**
 * A comment warning against modifications, viewed as a marker comment indicating generated code.
 */
private class DontModifyMarkerComment extends GeneratedCodeMarkerComment {
  DontModifyMarkerComment() {
    exists(string pattern |
      // Look for these patterns in each line of the comment.
      this.getText().regexpMatch(pattern) and
      pattern =
        [
          "(?im)^.*\\bGenerated by\\b.*\\bDo not edit\\b.*$",
          "(?im)^.*\\bAny modifications to this file will be lost\\b.*$"
        ]
    )
  }
}

/** A script that looks like it was generated by dart2js. */
private class DartGeneratedTopLevel extends TopLevel {
  DartGeneratedTopLevel() {
    exists(VarAccess deferredInit | deferredInit.getTopLevel() = this |
      deferredInit.getName() = "$dart_deferred_initializers$" or
      deferredInit.getName() = "$dart_deferred_initializers"
    )
  }
}

/**
 * Holds if `tl` has unusually many or unusually complicated function invocations, which is
 * often a sign of generated code.
 */
private predicate hasManyInvocations(TopLevel tl) {
  // heuristic: more than 100 arguments per line means it's probably generated
  exists(int nl, int na |
    nl = tl.getNumberOfLines() and
    nl > 0 and
    na = sum(InvokeExpr invk | tl = invk.getTopLevel() | invk.getNumArgument()) and
    na.(float) / nl > 100
  )
}

/**
 * Holds if `f` is side effect free, and full of primitive literals, which is often a sign of generated data code.
 */
private predicate isData(File f) {
  // heuristic: `f` has more than 1000 primitive literal expressions ...
  count(SyntacticConstants::PrimitiveLiteralConstant e | e.getFile() = f) > 1000 and
  // ... but no expressions with side effects ...
  not exists(Expr e |
    e.getFile() = f and
    e.isImpure() and
    // ... except for variable initializers
    not e instanceof VariableDeclarator
  )
}

/**
 * Holds if `f` is a single line that looks like a non-trivial amount of JSON data, which is often a sign of generated data code.
 */
private predicate isJsonLine(File f) {
  f.getNumberOfLines() = 1 and
  count(Expr e | e.getFile() = f) > 100 and
  forall(Expr e | e.getFile() = f |
    e instanceof ObjectExpr or
    e instanceof ArrayExpr or
    e instanceof NumberLiteral or
    e instanceof StringLiteral or
    e instanceof BooleanLiteral
  )
}

/**
 * Holds if `f` is a generated HTML file.
 */
private predicate isGeneratedHtml(File f) {
  exists(HTML::Element e |
    e.getFile() = f and
    e.getName() = "meta" and
    e.getAttributeByName("name").getValue() = "generator"
  )
  or
  exists(HTML::CommentNode comment |
    comment.getText().regexpMatch("\\s*Generated by [\\w-]+ \\d+\\.\\d+\\.\\d+\\s*") and
    comment.getFile() = f
  )
  or
  20 < countStartingHtmlElements(f, _)
}

/**
 * Gets an element that starts at line `l` in file `f`.
 */
private HTML::Element getAStartingElement(File f, int l) {
  result.getFile() = f and result.getLocation().getStartLine() = l
}

/**
 * Gets the number of HTML elements that start at line `l` in file `f`.
 */
private int countStartingHtmlElements(File f, int l) {
  result = strictcount(getAStartingElement(f, l))
}

/**
 * Holds if the base name of `f` is a number followed by a single extension.
 */
predicate isGeneratedFileName(File f) {
  f.getStem().regexpMatch("[0-9]+") and
  not f.getExtension() = "vue"
}

/**
 * Holds if `tl` looks like it contains generated code.
 */
predicate isGenerated(TopLevel tl) {
  tl.isMinified() or
  isBundle(tl) or
  tl instanceof GwtGeneratedTopLevel or
  tl instanceof DartGeneratedTopLevel or
  exists(GeneratedCodeMarkerComment gcmc | tl = gcmc.getTopLevel()) or
  hasManyInvocations(tl) or
  isData(tl.getFile()) or
  isJsonLine(tl.getFile()) or
  isGeneratedHtml(tl.getFile()) or
  isGeneratedFileName(tl.getFile())
}

/**
 * Holds if `file` look like it contains generated code.
 */
predicate isGeneratedCode(File file) {
  isGenerated(file.getATopLevel()) or
  isGeneratedHtml(file)
}
